Trying out Contrast Enhancement

When using the same data + hyperparams as "Reproducing Old Training Data", the results are ~identical


In [1]:
from itertools import islice
import json

from bubbly.model import Model
from bubbly.dr1 import LocationGenerator
from bubbly.extractors import RingWaveletCompressionExtractor, enhance_contrast
from bubbly.util import summary

from bubbly.util import rfp_curve
import brewer2mpl

%pylab


Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.kernel.zmq.pylab.backend_inline].
For more information, type 'help(pylab)'.
/Users/beaumont/Library/Python/2.7/lib/python/site-packages/scikits/__init__.py:1: UserWarning: Module argparse was already imported from /opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/argparse.pyc, but /Users/beaumont/Library/Python/2.7/lib/python/site-packages is being added to sys.path
  __import__('pkg_resources').declare_namespace(__name__)

In [2]:
ex = RingWaveletCompressionExtractor()
ex.preprocessors.append(enhance_contrast)

model = Model(ex, LocationGenerator(), 
              weak_learner_params=dict(verbose=1, max_depth=1, n_estimators=200, subsample=.4), 
              cascade_params=dict(verbose=1, max_layers=1))

training_data = json.load(open('../models/reproducing_old_training_data.json'))

In [3]:
model.retrain(training_data)


WARNING: RuntimeWarning: invalid value encountered in divide [bubbly.util]
WARNING: RuntimeWarning: invalid value encountered in divide [bubbly.util]
WARNING: Non-finite values in feature vectors. Fixing [bubbly.model]
........................................................................................................................................................................................................Cascade round 1. False pos rate: 5.383481e-02. Recall: 9.900442e-01
WARNING: Could not reduce false positive enough after 1 layers. False positive rate: 5.383481e-02. Recall: 9.900442e-01 [bubbly.cascade]
................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
No handlers could be found for logger "bubbly.decorators"

In [5]:
x, y = model._make_xy(model.training_data[0]['pos'], model.training_data[0]['neg'])
summary(model.estimator, x, y)


False Positive: 0.019
Recall:         0.973
AUC:            0.977
Accuracy:       0.979

In [6]:
cv_locator = LocationGenerator(1)
on2 = cv_locator.positives()
off2 = list(islice(cv_locator.negatives_iterator(), 10000))
x2, y2 = model._make_xy(on2, off2)
summary(model.estimator, x2, y2)


WARNING: RuntimeWarning: invalid value encountered in divide [bubbly.util]
False Positive: 0.035
Recall:         0.924
AUC:            0.945
Accuracy:       0.962

In [10]:
colors = brewer2mpl.get_map('Purples', 'sequential', 7).mpl_colors[::-1]

for i, df in enumerate(model.estimator.staged_decision_function(x2)):
    rfp_curve(df, y2, label = 'CV %i' % i, color = colors[i])

    
yp = model.estimator.decision_function(x)
rfp_curve(yp, y, color='red', label='Training Data')

ylim(0, .01)
legend(loc='upper left')


Out[10]:
<matplotlib.legend.Legend at 0x11a1b35d0>

In [ ]: